data <- read.csv('~/gr5243/philosophy_data.csv')
unique(data$author)
 [1] "Plato"           "Aristotle"       "Locke"           "Hume"            "Berkeley"        "Spinoza"         "Leibniz"         "Descartes"      
 [9] "Malebranche"     "Russell"         "Moore"           "Wittgenstein"    "Lewis"           "Quine"           "Popper"          "Kripke"         
[17] "Foucault"        "Derrida"         "Deleuze"         "Merleau-Ponty"   "Husserl"         "Heidegger"       "Kant"            "Fichte"         
[25] "Hegel"           "Marx"            "Lenin"           "Smith"           "Ricardo"         "Keynes"          "Epictetus"       "Marcus Aurelius"
[33] "Nietzsche"       "Wollstonecraft"  "Beauvoir"        "Davis"          
unique(data$original_publication_date)
 [1] -350 -320 1689 1739 1779 1713 1710 1677 1637 1641 1674 1921 1912 1910 1953 1985 1950 1959 1972 1975 1963 1961 1966 1967 1968 1945 1936 1907 1927
[30] 1788 1790 1781 1798 1817 1807 1820 1883 1848 1862 1776  125  170 1888 1886 1887 1792 1949 1981
library(tidytext)
data_words <- data %>%
  unnest_tokens(word, tokenized_txt)
data_count <- data_words %>%
  group_by(author, word) %>%
  count()
data(stop_words)
data_count_use <- data_count %>%
  anti_join(stop_words)
Joining, by = "word"
topwords <- data_count_use %>%
  group_by(author) %>%
  top_n(1, n)
ggplot(topwords, aes(x = reorder(word, n), y = n, fill = author)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  ggtitle("Top common word by each author") +
  xlab("Word") +
  ylab("Frequency")

library(dplyr)
library(ggplot2)
data_avg <- data %>%
  group_by(author) %>%
  summarize(avg_sentence_length = mean(sentence_length))
ggplot(data_avg, aes(x = avg_sentence_length, y = author)) +
  geom_bar(stat = "identity") +
  ggtitle("Average sentence length by author") +
  xlab("Average sentence length") +
  ylab("Author")

library(ggplot2)
data_avgschool <- data %>%
  group_by(school) %>%
  summarize(avg_sentence_length1 = mean(sentence_length))
ggplot(data_avgschool, aes(x = avg_sentence_length1, y = school)) +
  geom_bar(stat = "identity") +
  ggtitle("Average sentence length by school") +
  xlab("Average sentence length") +
  ylab("School")

descartes <- data %>% filter(author == "Descartes")
descartes_word <- descartes %>% unnest_tokens(word, tokenized_txt) %>% count(word, sort = TRUE)
descartes_word_use <- descartes_word %>%
  anti_join(stop_words)
Joining, by = "word"
descartes_top <- head(descartes_word_use, 20)
descartes_bot <- tail(descartes_word_use, 20)
ggplot(descartes_top, aes(x = reorder(word, n), y = n)) +
  geom_col() +
  xlab(NULL) +
  ggtitle("Top 20 words by Descartes") +
  xlab("Word") +
  ylab("Frequency")

wittgenstein <- data %>% filter(author == "Wittgenstein")
wittgenstein_word <- wittgenstein %>% unnest_tokens(word, tokenized_txt) %>% count(word, sort = TRUE)
wittgenstein_word_use <- wittgenstein_word %>%
anti_join(stop_words)
Joining, by = "word"
wittgenstein_top <- head(wittgenstein_word_use, 20)
wittgenstein_bot <- tail(wittgenstein_word_use, 20)
ggplot(wittgenstein_top, aes(x = reorder(word, n), y = n)) +
  geom_col() +
  xlab(NULL) +
  ggtitle("Top 20 words by Wittgenstein") +
  xlab("Word") +
  ylab("Frequency")

library(tidytext)
library(textdata)
get_sentiments("nrc")
d_nrc_positive <- get_sentiments("nrc") %>%
  filter(sentiment == "positive")

data_words %>%
  filter(author == "Descartes") %>%
  inner_join(d_nrc_positive) %>%
  count(word,sort = TRUE)
Joining, by = "word"
d_nrc_negative <- get_sentiments("nrc") %>%
  filter(sentiment == "negative")

data_words %>%
  filter(author == "Descartes") %>%
  inner_join(d_nrc_negative) %>%
  count(word,sort = TRUE)
Joining, by = "word"
w_nrc_positive <- get_sentiments("nrc") %>%
  filter(sentiment == "positive")

data_words %>%
  filter(author == "Wittgenstein") %>%
  inner_join(w_nrc_positive) %>%
  count(word,sort = TRUE)
Joining, by = "word"
w_nrc_negative <- get_sentiments("nrc") %>%
  filter(sentiment == "negative")

data_words %>%
  filter(author == "Wittgenstein") %>%
  inner_join(w_nrc_negative) %>%
  count(word,sort = TRUE)
Joining, by = "word"
library(sentimentr)
descartes_data <- data[data$author == "Descartes",]
descartes_data1 <- descartes_data$sentence_spacy
sentiment_scores <- sentiment(descartes_data1)
Warning: Each time `sentiment` is run it has to do sentence boundary disambiguation when a
raw `character` vector is passed to `text.var`. This may be costly of time and
memory.  It is highly recommended that the user first runs the raw `character`
vector through the `get_sentences` function.
head(sentiment_scores,20)
library(sentimentr)
wittgenstein_data <- data[data$author == "Wittgenstein",]
wittgenstein_data1 <- wittgenstein_data$sentence_spacy
sentiment_scores1 <- sentiment(wittgenstein_data1)
Warning: Each time `sentiment` is run it has to do sentence boundary disambiguation when a
raw `character` vector is passed to `text.var`. This may be costly of time and
memory.  It is highly recommended that the user first runs the raw `character`
vector through the `get_sentences` function.
head(sentiment_scores1,20)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCg0KYGBge3J9DQpkYXRhIDwtIHJlYWQuY3N2KCd+L2dyNTI0My9waGlsb3NvcGh5X2RhdGEuY3N2JykNCmBgYA0KDQoNCmBgYHtyfQ0KdW5pcXVlKGRhdGEkYXV0aG9yKQ0KYGBgDQpgYGB7cn0NCnVuaXF1ZShkYXRhJG9yaWdpbmFsX3B1YmxpY2F0aW9uX2RhdGUpDQpgYGANCmBgYHtyfQ0KbGlicmFyeSh0aWR5dGV4dCkNCmxpYnJhcnkoZ2dwbG90MikNCmRhdGFfd29yZHMgPC0gZGF0YSAlPiUNCiAgdW5uZXN0X3Rva2Vucyh3b3JkLCB0b2tlbml6ZWRfdHh0KQ0KZGF0YV9jb3VudCA8LSBkYXRhX3dvcmRzICU+JQ0KICBncm91cF9ieShhdXRob3IsIHdvcmQpICU+JQ0KICBjb3VudCgpDQpkYXRhKHN0b3Bfd29yZHMpDQpkYXRhX2NvdW50X3VzZSA8LSBkYXRhX2NvdW50ICU+JQ0KICBhbnRpX2pvaW4oc3RvcF93b3JkcykNCnRvcHdvcmRzIDwtIGRhdGFfY291bnRfdXNlICU+JQ0KICBncm91cF9ieShhdXRob3IpICU+JQ0KICB0b3BfbigxLCBuKQ0KZ2dwbG90KHRvcHdvcmRzLCBhZXMoeCA9IHJlb3JkZXIod29yZCwgbiksIHkgPSBuLCBmaWxsID0gYXV0aG9yKSkgKw0KICBnZW9tX2NvbCgpICsNCiAgeGxhYihOVUxMKSArDQogIGNvb3JkX2ZsaXAoKSArDQogIGdndGl0bGUoIlRvcCBjb21tb24gd29yZCBieSBlYWNoIGF1dGhvciIpICsNCiAgeGxhYigiV29yZCIpICsNCiAgeWxhYigiRnJlcXVlbmN5IikNCmBgYA0KDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQpkYXRhX2F2ZyA8LSBkYXRhICU+JQ0KICBncm91cF9ieShhdXRob3IpICU+JQ0KICBzdW1tYXJpemUoYXZnX3NlbnRlbmNlX2xlbmd0aCA9IG1lYW4oc2VudGVuY2VfbGVuZ3RoKSkNCmdncGxvdChkYXRhX2F2ZywgYWVzKHggPSBhdmdfc2VudGVuY2VfbGVuZ3RoLCB5ID0gYXV0aG9yKSkgKw0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKw0KICBnZ3RpdGxlKCJBdmVyYWdlIHNlbnRlbmNlIGxlbmd0aCBieSBhdXRob3IiKSArDQogIHhsYWIoIkF2ZXJhZ2Ugc2VudGVuY2UgbGVuZ3RoIikgKw0KICB5bGFiKCJBdXRob3IiKQ0KYGBgDQoNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpkYXRhX2F2Z3NjaG9vbCA8LSBkYXRhICU+JQ0KICBncm91cF9ieShzY2hvb2wpICU+JQ0KICBzdW1tYXJpemUoYXZnX3NlbnRlbmNlX2xlbmd0aDEgPSBtZWFuKHNlbnRlbmNlX2xlbmd0aCkpDQpnZ3Bsb3QoZGF0YV9hdmdzY2hvb2wsIGFlcyh4ID0gYXZnX3NlbnRlbmNlX2xlbmd0aDEsIHkgPSBzY2hvb2wpKSArDQogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiKSArDQogIGdndGl0bGUoIkF2ZXJhZ2Ugc2VudGVuY2UgbGVuZ3RoIGJ5IHNjaG9vbCIpICsNCiAgeGxhYigiQXZlcmFnZSBzZW50ZW5jZSBsZW5ndGgiKSArDQogIHlsYWIoIlNjaG9vbCIpDQpgYGANCg0KDQoNCg0KDQpgYGB7cn0NCmRlc2NhcnRlcyA8LSBkYXRhICU+JSBmaWx0ZXIoYXV0aG9yID09ICJEZXNjYXJ0ZXMiKQ0KZGVzY2FydGVzX3dvcmQgPC0gZGVzY2FydGVzICU+JSB1bm5lc3RfdG9rZW5zKHdvcmQsIHRva2VuaXplZF90eHQpICU+JSBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkNCmRlc2NhcnRlc193b3JkX3VzZSA8LSBkZXNjYXJ0ZXNfd29yZCAlPiUNCiAgYW50aV9qb2luKHN0b3Bfd29yZHMpDQpkZXNjYXJ0ZXNfdG9wIDwtIGhlYWQoZGVzY2FydGVzX3dvcmRfdXNlLCAyMCkNCmRlc2NhcnRlc19ib3QgPC0gdGFpbChkZXNjYXJ0ZXNfd29yZF91c2UsIDIwKQ0KZ2dwbG90KGRlc2NhcnRlc190b3AsIGFlcyh4ID0gcmVvcmRlcih3b3JkLCBuKSwgeSA9IG4pKSArDQogIGdlb21fY29sKCkgKw0KICB4bGFiKE5VTEwpICsNCiAgZ2d0aXRsZSgiVG9wIDIwIHdvcmRzIGJ5IERlc2NhcnRlcyIpICsNCiAgeGxhYigiV29yZCIpICsNCiAgeWxhYigiRnJlcXVlbmN5IikNCmBgYA0KDQpgYGB7cn0NCndpdHRnZW5zdGVpbiA8LSBkYXRhICU+JSBmaWx0ZXIoYXV0aG9yID09ICJXaXR0Z2Vuc3RlaW4iKQ0Kd2l0dGdlbnN0ZWluX3dvcmQgPC0gd2l0dGdlbnN0ZWluICU+JSB1bm5lc3RfdG9rZW5zKHdvcmQsIHRva2VuaXplZF90eHQpICU+JSBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkNCndpdHRnZW5zdGVpbl93b3JkX3VzZSA8LSB3aXR0Z2Vuc3RlaW5fd29yZCAlPiUNCmFudGlfam9pbihzdG9wX3dvcmRzKQ0Kd2l0dGdlbnN0ZWluX3RvcCA8LSBoZWFkKHdpdHRnZW5zdGVpbl93b3JkX3VzZSwgMjApDQp3aXR0Z2Vuc3RlaW5fYm90IDwtIHRhaWwod2l0dGdlbnN0ZWluX3dvcmRfdXNlLCAyMCkNCmdncGxvdCh3aXR0Z2Vuc3RlaW5fdG9wLCBhZXMoeCA9IHJlb3JkZXIod29yZCwgbiksIHkgPSBuKSkgKw0KICBnZW9tX2NvbCgpICsNCiAgeGxhYihOVUxMKSArDQogIGdndGl0bGUoIlRvcCAyMCB3b3JkcyBieSBXaXR0Z2Vuc3RlaW4iKSArDQogIHhsYWIoIldvcmQiKSArDQogIHlsYWIoIkZyZXF1ZW5jeSIpDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KHRpZHl0ZXh0KQ0KbGlicmFyeSh0ZXh0ZGF0YSkNCmdldF9zZW50aW1lbnRzKCJucmMiKQ0KYGBgDQoNCg0KYGBge3J9DQpkX25yY19wb3NpdGl2ZSA8LSBnZXRfc2VudGltZW50cygibnJjIikgJT4lDQogIGZpbHRlcihzZW50aW1lbnQgPT0gInBvc2l0aXZlIikNCg0KZGF0YV93b3JkcyAlPiUNCiAgZmlsdGVyKGF1dGhvciA9PSAiRGVzY2FydGVzIikgJT4lDQogIGlubmVyX2pvaW4oZF9ucmNfcG9zaXRpdmUpICU+JQ0KICBjb3VudCh3b3JkLHNvcnQgPSBUUlVFKQ0KYGBgDQoNCmBgYHtyfQ0KZF9ucmNfbmVnYXRpdmUgPC0gZ2V0X3NlbnRpbWVudHMoIm5yYyIpICU+JQ0KICBmaWx0ZXIoc2VudGltZW50ID09ICJuZWdhdGl2ZSIpDQoNCmRhdGFfd29yZHMgJT4lDQogIGZpbHRlcihhdXRob3IgPT0gIkRlc2NhcnRlcyIpICU+JQ0KICBpbm5lcl9qb2luKGRfbnJjX25lZ2F0aXZlKSAlPiUNCiAgY291bnQod29yZCxzb3J0ID0gVFJVRSkNCmBgYA0KDQoNCg0KYGBge3J9DQp3X25yY19wb3NpdGl2ZSA8LSBnZXRfc2VudGltZW50cygibnJjIikgJT4lDQogIGZpbHRlcihzZW50aW1lbnQgPT0gInBvc2l0aXZlIikNCg0KZGF0YV93b3JkcyAlPiUNCiAgZmlsdGVyKGF1dGhvciA9PSAiV2l0dGdlbnN0ZWluIikgJT4lDQogIGlubmVyX2pvaW4od19ucmNfcG9zaXRpdmUpICU+JQ0KICBjb3VudCh3b3JkLHNvcnQgPSBUUlVFKQ0KYGBgDQpgYGB7cn0NCndfbnJjX25lZ2F0aXZlIDwtIGdldF9zZW50aW1lbnRzKCJucmMiKSAlPiUNCiAgZmlsdGVyKHNlbnRpbWVudCA9PSAibmVnYXRpdmUiKQ0KDQpkYXRhX3dvcmRzICU+JQ0KICBmaWx0ZXIoYXV0aG9yID09ICJXaXR0Z2Vuc3RlaW4iKSAlPiUNCiAgaW5uZXJfam9pbih3X25yY19uZWdhdGl2ZSkgJT4lDQogIGNvdW50KHdvcmQsc29ydCA9IFRSVUUpDQpgYGANCg0KDQpgYGB7cn0NCmxpYnJhcnkoc2VudGltZW50cikNCmRlc2NhcnRlc19kYXRhIDwtIGRhdGFbZGF0YSRhdXRob3IgPT0gIkRlc2NhcnRlcyIsXQ0KZGVzY2FydGVzX2RhdGExIDwtIGRlc2NhcnRlc19kYXRhJHNlbnRlbmNlX3NwYWN5DQpzZW50aW1lbnRfc2NvcmVzIDwtIHNlbnRpbWVudChkZXNjYXJ0ZXNfZGF0YTEpDQpoZWFkKHNlbnRpbWVudF9zY29yZXMsMjApDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KHNlbnRpbWVudHIpDQp3aXR0Z2Vuc3RlaW5fZGF0YSA8LSBkYXRhW2RhdGEkYXV0aG9yID09ICJXaXR0Z2Vuc3RlaW4iLF0NCndpdHRnZW5zdGVpbl9kYXRhMSA8LSB3aXR0Z2Vuc3RlaW5fZGF0YSRzZW50ZW5jZV9zcGFjeQ0Kc2VudGltZW50X3Njb3JlczEgPC0gc2VudGltZW50KHdpdHRnZW5zdGVpbl9kYXRhMSkNCmhlYWQoc2VudGltZW50X3Njb3JlczEsMjApDQpgYGANCg0KDQo=